/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.paa;

import java.util.Arrays;

import mosdi.fa.CDFA;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.util.ProductEncoder;

// TODO: THIS CLASS IS COMPLETELY UNTESTED!!!!!!

/** For a joint text/annotation model, computes the distribution of the sum of annotations 
 *  of pattern occurrences in a clump. */
public class ClumpAnnotationSumCalculator {
	private int annotationAlphabetSize;
	private ProductEncoder productAlphabet;
	private CDFA cdfa;
	private int patternLength;
	private int maxAnnotationSum;
	private FiniteMemoryTextModel textModel;

	/** Constructor.
	 * 
	 * @param textModel Joint model of text and annotation.
	 * @param productAlphabet Encoder to combine a character and an annotation
	 *                        into character of joint alphabet. First component: text,
	 *                        second component: text.
	 * @param patternLength Length of pattern recognized by CDFA. 
	 */
	public ClumpAnnotationSumCalculator(FiniteMemoryTextModel textModel, ProductEncoder productAlphabet,	CDFA cdfa, int patternLength) {
		if (1==1) throw new IllegalStateException("THIS CLASS IS COMPLETELY UNTESTED, DON'T USE IT!");
		this.productAlphabet = productAlphabet;
		this.cdfa = cdfa;
		this.patternLength = patternLength;
		this.textModel = textModel;
	}

	/** DAA with state space (CDFA states)x(annotation history), value space and
	 *  oprations are left undefined (to be provided by derived classes). */
	private abstract class AnnotationDAA extends DAA {
		protected ProductEncoder stateEncoder;
		/** Encodes a window of sequence annotations. */ 
		protected ProductEncoder historyEncoder;
		/** Constructor. */
		public AnnotationDAA() {
			int[] bases = new int[patternLength];
			Arrays.fill(bases, annotationAlphabetSize);
			historyEncoder = new ProductEncoder(false, annotationAlphabetSize);
			stateEncoder = new ProductEncoder(cdfa.getStateCount(), historyEncoder.getValueCount());
		}
		@Override
		public int getAlphabetSize() { return productAlphabet.getValueCount(); }
		@Override
		public int getEmissionCount() {
			return maxAnnotationSum*patternLength+1;
		}
		@Override
		public int getEmission(int state) {
			int[] decodedState = stateEncoder.decode(state);
			if (cdfa.getStateOutput(decodedState[0])==0) return 0;
			int[] annotations = historyEncoder.decode(decodedState[1]);
			int sum = 0;
			for (int a : annotations) sum+=a;
			return sum; 
		}
		@Override
		public int getStartState() { return stateEncoder.encode(cdfa.getStartState(), 0); }
		@Override
		public int getStateCount() { return stateEncoder.getValueCount(); }
		@Override
		public int getTransitionTarget(int state, int character) {
			int[] decodedChar = productAlphabet.decode(character);
			int[] decodedState = stateEncoder.decode(state);
			// perform CDFA transition
			decodedState[0] = cdfa.getTransitionTarget(decodedState[0], decodedChar[0]);
			// update window
			decodedState[1] = (decodedState[1]*annotationAlphabetSize + decodedChar[1]) % historyEncoder.getValueCount();
			return stateEncoder.encode(decodedState);
		}
	}

	/** Purpose: computation of clump start distribution; i.e. the distribution of 
	 *  states at clump start positions. */
	private class ClumpStartDAA extends AnnotationDAA {
		@Override
		public int getStartValue() {
			return patternLength;
		}
		@Override
		public int getValueCount() { return patternLength+2; }
		@Override
		public int performOperation(int state, int value, int emission) {
			if (value==patternLength+1) value = 0;
			value = Math.min(patternLength, value+1);
			int[] decodedState = stateEncoder.decode(state); 
			if (cdfa.getStateOutput(decodedState[0])>0){
				return (value==patternLength)?patternLength+1:0;
			}
			return value;
		}
	}
	
	/** Purpose: computation of sum of match annotations in a clump. */
	private class ClumpAnnotationSumDAA extends AnnotationDAA {
		/** Value consists of two components: (annotation sum)x(characters since last match position)*/
		private ProductEncoder valueEncoder;
		private ClumpAnnotationSumDAA() {
			valueEncoder = new ProductEncoder(maxAnnotationSum+1,patternLength+1);
		}
		@Override
		public int getStartValue() { return 0; }
		@Override
		public int getValueCount() { return valueEncoder.getValueCount(); }
		@Override
		public int performOperation(int state, int value, int emission) {
			int cdfaState = stateEncoder.decodeComponent(0, state);
			int[] decodedValue = valueEncoder.decode(value);
			if (decodedValue[1]>=patternLength-1) {
				// the special value patternLength indicates
				// that the clump has ended
				decodedValue[1] = patternLength;
			} else {
				if (cdfa.getStateOutput(cdfaState)==0) {
					decodedValue[1] += 1;
				} else {
					decodedValue[0] = Math.min(decodedValue[0]+emission, maxAnnotationSum);
					decodedValue[1] = 0;
				}
			}
			return valueEncoder.encode(decodedValue);
		}
		public int encodeValue(int annotationSum, int distanceToLastMatch) {
			return valueEncoder.encode(annotationSum, distanceToLastMatch);
		}
	}
	
	/** Compute the distribution of annotation sums of matches in a clump.
	 *  @param accuracy Entries in returned array sum to >=1.0-accuracy.
	 */
	public double[] annotationSumDistribution(int maxAnnotationSum, double accuracy) {
		this.maxAnnotationSum = maxAnnotationSum;
		DAA clumpStartDaa = new ClumpStartDAA();
		ClumpAnnotationSumDAA annotationSumDaa = new ClumpAnnotationSumDAA();
		TextBasedPAA clumpStartPaa = new TextBasedPAA(clumpStartDaa, textModel);
		TextBasedPAA annotationSumPaa = new TextBasedPAA(annotationSumDaa, textModel);
		assert(clumpStartPaa.getStateCount() == annotationSumPaa.getStateCount());
		int stateCount = annotationSumPaa.getStateCount();
		// Step 1: Determine state/value distribution at clump start positions,
		//         i.e. at positions where the first match in a clump ends. Here, a smaller
		//         value space that only keeps track of distance to last match is sufficient
		double[][] clumpStartEq = clumpStartPaa.convergeToStateValueEquilibrium(1e-13, 1000000);
		// Step 2: Normalize last column to 1.0
		double sum = 0.0;
		for (int state=0; state<stateCount; ++state) sum += clumpStartEq[state][patternLength+1];
		for (int state=0; state<stateCount; ++state) clumpStartEq[state][patternLength+1] /= sum;
		// Step 2: Translate into state/value distribution with complete value space (also keeping
		//         track of annotation sums accumulated so far).
		double[][] distribution = new double[annotationSumPaa.getStateCount()][annotationSumPaa.getValueCount()];
		for (int state=0; state<stateCount; ++state) {
			double p = clumpStartEq[state][patternLength+1]; 
			if (p>0.0) {
				int value = annotationSumDaa.encodeValue(annotationSumDaa.getEmission(state), 0);
				distribution[state][value] = p;
			}
		}
		// Step 3: Compute annotation sum distribution
		double[] result = new double[maxAnnotationSum+1]; 
		double probabilityMassLeft = 1.0;
		while (probabilityMassLeft>=accuracy) {
			for (int annotationSum=0; annotationSum<=maxAnnotationSum; ++annotationSum) {
				int value = annotationSumDaa.encodeValue(annotationSum, patternLength-1);
				for (int state=0; state<stateCount; ++state) {
					if (distribution[state][value]>0.0) {
						result[annotationSum] += distribution[state][value];
					}
				}
			}
			// perform one time step
			distribution = annotationSumPaa.updateStateValueDistribution(distribution);
			// sum up mass that is left
			probabilityMassLeft = 0.0;
			for (int annotationSum=0; annotationSum<=maxAnnotationSum; ++annotationSum) {
				for (int x=0; x<patternLength; ++x) {
					int value = annotationSumDaa.encodeValue(annotationSum, x);
					for (int state=0; state<stateCount; ++state) {
						probabilityMassLeft += distribution[state][value];
					}
					if (probabilityMassLeft>=accuracy) break;
				}
				if (probabilityMassLeft>=accuracy) break;
			}
		}
		return result;
	}

}
